load('./data/df_combine.Rdata')
df_exercise = df_combine %>%
mutate(exercise = as.numeric(exercise),
total_exercise = exercise * exercise_time,
gender = as.factor(gender))
df_incidence <- df_exercise %>%
mutate(tb = fct_recode(tb, '1'= 'Yes', '0'='No')) %>%
mutate(tb=as.character(tb),
tb=as.numeric(tb)) %>%
group_by(total_exercise) %>%
summarise(tb_sum = sum(tb),
incidence = tb_sum/n()) %>%
ungroup()
df_exercise = df_exercise %>%
inner_join(df_incidence, by = "total_exercise")
df_exercise$survival = with(df_exercise, Surv(days, tb == "Yes"))
km <- survfit(survival ~ 1, data = df_exercise, conf.type = "log-log")
km_by_exercise <- survfit(survival ~ exercise, data = df_exercise, conf.type = "log-log")
km_by_exercise_time <- survfit(survival ~ exercise_time, data = df_exercise, conf.type = "log-log")
plot_exercise_level <- GGally::ggsurv(km_by_exercise, main = "Kaplan-Meier Curve for getting TB of different exercise level")
plotly::ggplotly(plot_exercise_level)
plot_exercise_time <- GGally::ggsurv(km_by_exercise_time, main = "Kaplan-Meier Curve for getting TB of different exercise time")
plotly::ggplotly(plot_exercise_time)
The plot shows that people with lower exercise level have higher probability of getting TB, which show that exercise is important to preventing TB. However, people with over 2 hours exercise time have higher probability of getting TB. That may be because that people spending more time on exercise have more probability touching other people and getting infection.
plot_exer<-ggplot(df_exercise, aes(x = dmage, y =total_exercise, colour=dmage)) +
geom_histogram(stat = "identity", width = .6) +
labs(title="The average exercise vs age",
x = "age") +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_text(face="bold", size=12),
axis.text.y = element_text(angle=0, vjust=0.5, size=10),
legend.title = element_text(size=12, face="bold"),
legend.text = element_text(size = 12, face = "bold"))+
facet_wrap(~gender)
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(plot_exer)